This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
install.packages("tidyverse")
library(tmap)
library(sf)
library(geojsonio)
library(tmaptools)
library(tidyverse)
library(rgdal)
library(raster)
library(SciViews)
library(base)
library(broom)
library(spdep)
####Basemap
library(tmap)
BoroughMap <- geojson_read("https://raw.githubusercontent.com/ft-interactive/geo-data/master/uk/london/london-wards-2014.geojson", what = "sp")
BNG = "+init=epsg:27700"
BoroughMapBNG <- spTransform(BoroughMap,BNG)
WGS = "+init=epsg:4326"
tmap_mode("view")
tm_shape(BoroughMapBNG) +tm_polygons(col = NA, alpha = 0.5)

#aggregate city of london 
library(sf)
library(dplyr)
library(rgdal)
BoroughMapSF <- st_as_sf(BoroughMapBNG)
library(rgeos)
CityofLondon<- gUnionCascaded(BoroughMapBNG[c(630:654), ])
plot(CityofLondon)
CityofLondonSF <- st_as_sf(CityofLondon)
CityofLondonSF$gss_code_ward<-"E05000001"  ##add same columns in city of london, for aggregating with other borough
CityofLondonSF$gss_code_borough <- "E09000001"
CityofLondonSF$borough <-"City Of London"
CityofLondonSF$ward <- "City Of London"
BoroughMapSFnew <- BoroughMapSF[-c(630:654), ]
LondonWardSF <- rbind(BoroughMapSFnew,CityofLondonSF) ##Attention New not new
LondonWard <- as(LondonWardSF,"Spatial")
rownames(LondonWardSF) <- 1:nrow(LondonWardSF)
plot(LondonWard)
tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.5)
####population density (using https://data.london.gov.uk/dataset/land-area-and-population-density-ward-and-borough 2016)
library(tidyverse)
Density <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/population_density_2016_final.csv",na = "n/a")
#normalization population_per_square_km
normalization<-function(x){
return((x-min(x))/(max(x)-min(x)))}
density.normalization <- normalization(Density$`population density`)
Density$Density.normalization <- density.normalization

#join the data to basemap P3P27
LondonWard.join.popdens <- LondonWardSF%>% left_join(Density,by=c("gss_code_ward"="New Code"))
qtm(LondonWard.join.popdens,fill="Density.normalization")
#### coexistence of old and new building
#read building list
building <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/Listed Buildings/ListedBuildings_06Dec2018.shp",layer="ListedBuildings_06Dec2018")

library(maptools)
LondonBoundary<-unionSpatialPolygons(LondonWard,gss_code_borough)
plot(LondonBoundary)
### Public transport accessibility level(PTAL)
PTAL <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/Ward2014 Avpublic transport accessibility level2015.csv",na = "n/a")
#normalization PTAL
normalization<-function(x){
return((x-min(x))/(max(x)-min(x)))}
PTAL.normalization <- normalization(PTAL$AvPTAI2015)
PTAL$PTAL.normalization <- PTAL.normalization
#join the data to basemap P3P27
LondonWard.join.PTAL <- LondonWardSF%>% left_join(PTAL,by=c("gss_code_ward"="Ward Code"))
qtm(LondonWard.join.PTAL,fill="PTAL.normalization")
qtm(LondonWard.join.PTAL,fill="AvPTAI2015")

###Public open space accessibility level(POSAL)
POSAL <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/access_public_open_space_ward.csv",na = "n/a")
POSAL$POSAL.normalization <- normalization(POSAL$`Open Space`)
LondonWard.join.POSAL <- LondonWardSF%>% left_join(POSAL,by=c("gss_code_ward"="WD13CD"))
#qtm(LondonWard.join.POSAL,fill="POSAL.normalization")
#combine two accessibility sub-index
Access.normalization<- merge(POSAL,PTAL, by.x="WD13CD",by.y="Ward Code",all=TRUE)
#remove useless column and rows
Access.normalization[,c(4:8,10:11)] <- NULL
#calculate the mean of two accessibility level
Access.normalization$access.mean <- rowMeans(Access.normalization[c('PTAL.normalization', 'POSAL.normalization')], na.rm=TRUE)

economic vitality

###economic vitality
#CLASS 1  house price
#HousePrice <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/house_price_mean2016.csv",na = "n/a")
#HousePrice.normalization <- normalization(HousePrice$Value)
#HousePrice$HousePrice.normalization <- HousePrice.normalization
#CLASS 2 economic activity
EcoAct <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/economic_activity_ward.csv",na = "n/a")
EcoAct.normalization <- normalization(EcoAct$`Economically active: Total`)
EcoAct$EcoAct.normalization <- EcoAct.normalization

land use mix

### Land use mix
#https://en.wikipedia.org/wiki/Planning_use_classes_in_England 
# six classes--
landcover <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/gis_osm_landuse_a_free_1.shp",layer="gis_osm_landuse_a_free_1")
landcoverBNG <- spTransform(landcover, BNG)
landcoverBNGSF <- st_as_sf(landcoverBNG)

AOIS <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/gis_osm_pois_a_free_1.shp",layer="gis_osm_pois_a_free_1")
AOISBNG <- spTransform(AOIS, BNG)
AOISBNGSF <- st_as_sf(AOISBNG)

#find missing data in each column in landcoverBNGSF
colSums(is.na(landcoverBNGSF))
#get the classes of landcover
summary(landcoverBNGSF)
#CLASS1 greenspace
greenspaceBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="park" |  landcoverBNGSF$fclass =="forest"|landcoverBNGSF$fclass =="nature_reserve"|landcoverBNGSF$fclass =="grass"),] 
greenspaceBNGSP <- as(greenspaceBNGSF,"Spatial")
#CLASS2 industrial and commercial
industrialBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="industrial"| landcoverBNGSF$fclass =="commercial"),]
industrialBNGSP <- as(industrialBNGSF,"Spatial")
#CLASS 3 residential
residentialBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="residential" ),]
residentialBNGSP <- as(residentialBNGSF,"Spatial")
#CLASS 4 recreation-leisure
recreationBNGSF1 <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="recreation_ground"| landcoverBNGSF$fclass =="allotments"),]
recreationBNGSF2 <- AOISBNGSF[ which(AOISBNGSF$fclass =="attraction"|AOISBNGSF$fclass =="theatre"|AOISBNGSF$fclass =="arts_centre"),]
recreationBNGSF <- rbind(recreationBNGSF2,recreationBNGSF1)
recreationBNGSP <- as(recreationBNGSF,"Spatial")


#CLASS 5 retail
retailBNGSF1 <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="retail" ),]
retailBNGSF2 <- AOISBNGSF[ which( AOISBNGSF$fclass =="retail" ),]
retialBNGSF <- rbind(retailBNGSF2,retailBNGSF1)
retailBNGSP <- as(retailBNGSF,"Spatial")

#CLASS 6--community services
communityBNGSF <- AOISBNGSF[ which(AOISBNGSF$fclass =="school"|AOISBNGSF$fclass =="hospital"|AOISBNGSF$fclass =="nursing_home"|AOISBNGSF$fclass =="university"  ),]
communityBNGSP <- as(communityBNGSF,"Spatial")

#CLASS 7--transport
transport <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/road_polygon_merge.shp",layer="road_polygon_merge")
transportBNG <- spTransform(transport, BNG)
transportBNG <- transportBNG[transportBNG@data$AREA >10000,] 
# Assign the land use area for each subset. Here we first use the most widespread residential as example 
library(raster)
r <- raster(ncols=400, nrows=400) #generate raster size(quantity)
extent(r) <- extent(residentialBNGSP)  #Important! Assign the extent of raster to cover the same extents of the polygon
residential.Raster <- rasterize(residentialBNGSP,r,background=NA) #convert polygon to raster
#tmap_mode("view")
#qtm(residential.Raster)+tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.5)
#extract rasters from polygons#(extract() cannot use method=bilinear in raster-in-polygon)
residential.extract <- raster::extract(residential.Raster,LondonWard,df=TRUE, weights =FALSE, na.rm = TRUE)
#remove rows with NA
row.has.na <- apply(residential.extract, 1, function(x){any(is.na(x))})
residential.extract2 <-residential.extract[!row.has.na,]
#merge the pixel in same ward(aggregate the frequency of the same data in one)
library(plyr)
residential.extract.sum <- factor(residential.extract2$ID)  
residential.extract.sum <- table(residential.extract2$ID)
residential.extract.wardsum <- as.data.frame(residential.extract.sum)
#create new order number for LondonWardSF, for merge the residential.extract and LondonWardSF by order
LondonWard2SF <- LondonWardSF
LondonWard2SF$Var1 <- 1:630
LondonWard2SF[,'Var1']<-factor(LondonWard2SF[,'Var1'])
#merge two dataframe
residential.extract.final <- merge(residential.extract.wardsum, LondonWard2SF, by="Var1",all=TRUE)
#residential.extract.final [!duplicated(residential.extract.final ), ]
#replace NA values with 0
residential.extract.final[is.na(residential.extract.final)] <- 0
#rename the column
colnames(residential.extract.final)[2] <- "Freq.residential"
#raster cell size
#we have to convert raster to polygon because of raster(). This function only compute the pixel area in longitude/latitude coordiante system
residential.repolygon <- rasterToPolygons(residential.Raster, fun=NULL, n=4, na.rm=TRUE, digits=12, dissolve=TRUE)
#qtm(residential.repolygon)
residential.repolygonSF <- st_as_sf(residential.repolygon) 
residential.repolygonSF$area <- st_area(residential.repolygonSF)
#so the single pixel area is 28999.54 [m^2](0.029 km^2)

land use mix functions

##based on the details above, we create functions for land use analysis
function.raster <- function(landsp){
  extent(r) <- extent(landsp)
  landsp.Raster <- rasterize(landsp,r,background=NA)
  return(landsp.Raster)
}

function.extract<- function(landsp.Raster){
  landsp.extract <- raster::extract(landsp.Raster,LondonWard,df=TRUE, weights =FALSE, na.rm = TRUE)
  row.has.na <- apply(landsp.extract, 1, function(x){any(is.na(x))})
  landsp.extract2 <-landsp.extract[!row.has.na,]
  landsp.extract.sum <- factor(landsp.extract2$ID)  
  landsp.extract.sum <- table(landsp.extract2$ID)
  landsp.extract.wardsum <- as.data.frame(landsp.extract.sum)
  landsp.extract.final <- merge(landsp.extract.wardsum, LondonWard2SF, by="Var1",all=TRUE)
  landsp.extract.final[is.na(landsp.extract.final)] <- 0
  return(landsp.extract.final)
}
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(index.dataframeSP )+
    tm_polygons(c("Index", "MixIndex","Density.normalization","EcoAct.normalization","access.mean")) +
    tm_facets(sync = TRUE, ncol = 1,nrow=5,drop.empty.facets = F)
The number of facets exceeds the limit of 4. The limit can be extended to 5 with:
tmap_options(limits = c(facets.view = 5))

Test for the relationship

Linear regression model

qplot(PointPerSK,Index,data=node.index.frame,geom = "point") + stat_smooth(method="lm", se=FALSE, size=1)
#fit the linear model
model <- lm(PointPerSK ~ Index, data =node.index.frame)
#residual dataframe
library(broom)
model_res <- tidy(model)
summary(model)

Call:
lm(formula = PointPerSK ~ Index, data = node.index.frame)

Residuals:
    Min      1Q  Median      3Q     Max 
-61.603 -18.377  -3.996  13.130 231.436 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   15.872      3.324   4.775 2.24e-06 ***
Index        374.667     14.973  25.022  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 27.99 on 628 degrees of freedom
Multiple R-squared:  0.4992,    Adjusted R-squared:  0.4984 
F-statistic: 626.1 on 1 and 628 DF,  p-value: < 2.2e-16
plot(model)

The coefficient (estimate) in the summary() table shows that(on average) for a 1 unit (day) increase in unauthorised absence from school, there is a reduction in the average GCSE point score of -29.874.

The p-values for the intercept and the coefficient are highly statistically significant (<0.001) so we can rely on the relationship that is being observed.

The adjusted R-squared statistic is 0.38, which tells us that 38% of the variation in GCSE scores across Wards in London can be explained by variation in unauthorised absence from school (which is quite a lot for a single variable).

Interrogating the last graph in plot(model1)which is a scatter plot of fitted values (the model estimates achieved by plugging the values and coefficients back into the regression equation) against standardised residuals, we can see no apparent patterns in the cloud of points, which suggests the model has not violated any important assumptions.

Test for spatial patterns

we shoud test that is there any spatial clustering of residuals.If these places cluster in space, then there might be some unobserved underlying factor causing this. This is important is it means that the assumption of independence that regression models rely upon might be violated. Now in our case here, there is not a clear violation of spatial independence, but it is certainly hinted at. test for spatial patterns (spatial autocorrelation) using the Moran’s I statistic

### Test for spatial patterns (spatial autocorrelation) using the Moran's I statistic
#copy a new LondonWardSF
LondonWardSF.pattern <- LondonWardSF
LondonWardSF.pattern$model_resids <- model$residuals
LondonWardSF.pattern <- LondonWardSF.pattern[!(is.na(LondonWardSF.pattern$model_resids) ),]
LondonWardSP.pattern <- as(LondonWardSF.pattern,"Spatial")
tmap_mode("view")
qtm(LondonWardSF.pattern,fill='model_resids')
moran.test(LondonWardSP.pattern@data$model_resids, SpatialWeight,zero.policy=T)

    Moran I test under randomisation

data:  LondonWardSP.pattern@data$model_resids  
weights: SpatialWeight  n reduced by no-neighbour observations
  

Moran I statistic standard deviate = 3.1963, p-value = 0.0006961
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.0719958817     -0.0015923567      0.0005300699 

so here we have a statistically significant but relatively weak indication that there is some spatial clustering of residual values. A value of 0.07 (1 being perfect spatial autocorrelation, 0 being none at all) shows that there is no evidence that high residual values cluster near high values and low residual values cluster near lower values.

multi linear regression model

model_multi <- lm(PointPerSK ~ MixIndex+Density.normalization + access.mean + EcoAct.normalization, data = node.index.frame)
summary(model_multi)

Call:
lm(formula = PointPerSK ~ MixIndex + Density.normalization + 
    access.mean + EcoAct.normalization, data = node.index.frame)

Residuals:
    Min      1Q  Median      3Q     Max 
-62.704 -15.816  -3.713  12.011 218.380 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           2.381e+01  6.546e+00   3.638 0.000298 ***
MixIndex              4.427e+07  1.951e+07   2.269 0.023615 *  
Density.normalization 1.506e+02  6.213e+00  24.245  < 2e-16 ***
access.mean           2.679e+01  7.021e+00   3.816 0.000149 ***
EcoAct.normalization  8.127e+00  6.392e+00   1.271 0.204039    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 26.27 on 625 degrees of freedom
Multiple R-squared:  0.5611,    Adjusted R-squared:  0.5583 
F-statistic: 199.8 on 4 and 625 DF,  p-value: < 2.2e-16
neighbour <- poly2nb(LondonWardSP.pattern,queen=T)
#create spatial weights object from these weights
SpatialWeight <- nb2listw(neighbour,style="C",zero.policy=T)
#Run moran I test
moran.test(LondonWardSP.pattern@data$model_resids_multi, SpatialWeight,zero.policy=T)

    Moran I test under randomisation

data:  LondonWardSP.pattern@data$model_resids_multi  
weights: SpatialWeight  n reduced by no-neighbour observations
  

Moran I statistic standard deviate = 2.1297, p-value = 0.0166
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     0.0473837384     -0.0015923567      0.0005288307 
---
title: "R Notebook"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

```{r}
library(spatstat)
library(sp)
library(rgeos)
library(maptools)
library(GISTools)
install.packages("tidyverse")
library(tmap)
library(sf)
library(geojsonio)
library(tmaptools)
library(tidyverse)
library(rgdal)
library(raster)
library(SciViews)
library(base)
library(broom)
library(spdep)
```


```{r}
####Basemap
library(tmap)
BoroughMap <- geojson_read("https://raw.githubusercontent.com/ft-interactive/geo-data/master/uk/london/london-wards-2014.geojson", what = "sp")
BNG = "+init=epsg:27700"
BoroughMapBNG <- spTransform(BoroughMap,BNG)
WGS = "+init=epsg:4326"
tmap_mode("view")
tm_shape(BoroughMapBNG) +tm_polygons(col = NA, alpha = 0.5)

#aggregate city of london 
library(sf)
library(dplyr)
library(rgdal)
BoroughMapSF <- st_as_sf(BoroughMapBNG)
library(rgeos)
CityofLondon<- gUnionCascaded(BoroughMapBNG[c(630:654), ])
plot(CityofLondon)
CityofLondonSF <- st_as_sf(CityofLondon)
CityofLondonSF$gss_code_ward<-"E05000001"  ##add same columns in city of london, for aggregating with other borough
CityofLondonSF$gss_code_borough <- "E09000001"
CityofLondonSF$borough <-"City Of London"
CityofLondonSF$ward <- "City Of London"
BoroughMapSFnew <- BoroughMapSF[-c(630:654), ]
LondonWardSF <- rbind(BoroughMapSFnew,CityofLondonSF) ##Attention New not new
LondonWard <- as(LondonWardSF,"Spatial")
rownames(LondonWardSF) <- 1:nrow(LondonWardSF)
plot(LondonWard)
tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.5)


```

```{r}
####population density (using https://data.london.gov.uk/dataset/land-area-and-population-density-ward-and-borough 2016)
library(tidyverse)
Density <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/population_density_2016_final.csv",na = "n/a")
#normalization population_per_square_km
normalization<-function(x){
return((x-min(x))/(max(x)-min(x)))}
density.normalization <- normalization(Density$`population density`)
Density$Density.normalization <- density.normalization

#join the data to basemap P3P27
LondonWard.join.popdens <- LondonWardSF%>% left_join(Density,by=c("gss_code_ward"="New Code"))
qtm(LondonWard.join.popdens,fill="Density.normalization")
```

```{r}
#### coexistence of old and new building
#read building list
building <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/Listed Buildings/ListedBuildings_06Dec2018.shp",layer="ListedBuildings_06Dec2018")

library(maptools)
LondonBoundary<-unionSpatialPolygons(LondonWard,gss_code_borough)
plot(LondonBoundary)
```

```{r}
### Public transport accessibility level(PTAL)
PTAL <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/Ward2014 Avpublic transport accessibility level2015.csv",na = "n/a")
#normalization PTAL
normalization<-function(x){
return((x-min(x))/(max(x)-min(x)))}
PTAL.normalization <- normalization(PTAL$AvPTAI2015)
PTAL$PTAL.normalization <- PTAL.normalization
#join the data to basemap P3P27
LondonWard.join.PTAL <- LondonWardSF%>% left_join(PTAL,by=c("gss_code_ward"="Ward Code"))
qtm(LondonWard.join.PTAL,fill="PTAL.normalization")
qtm(LondonWard.join.PTAL,fill="AvPTAI2015")

###Public open space accessibility level(POSAL)
POSAL <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/access_public_open_space_ward.csv",na = "n/a")
POSAL$POSAL.normalization <- normalization(POSAL$`Open Space`)
LondonWard.join.POSAL <- LondonWardSF%>% left_join(POSAL,by=c("gss_code_ward"="WD13CD"))
#qtm(LondonWard.join.POSAL,fill="POSAL.normalization")
#combine two accessibility sub-index
Access.normalization<- merge(POSAL,PTAL, by.x="WD13CD",by.y="Ward Code",all=TRUE)
#remove useless column and rows
Access.normalization[,c(4:8,10:11)] <- NULL
#calculate the mean of two accessibility level
Access.normalization$access.mean <- rowMeans(Access.normalization[c('PTAL.normalization', 'POSAL.normalization')], na.rm=TRUE)
```

```{r}
### Road intersections
node <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/Road Networkoproad_essh_gb/new_road_node.shp",layer="new_road_node")
BNG = "+init=epsg:27700"
nodeBNG <- spTransform(node, BNG)
summary(node)
##qtm(nodeBNG)
nodeBNGSF <- st_as_sf(nodeBNG)
#select the node type"junction"
nodeBNGSF <- nodeBNGSF[which(nodeBNGSF$formOfNode=="junction"),]
nodeBNGSP <- as(nodeBNGSF, "Spatial")
head(node)
#select node point in borough
nodeBNGSP <- remove.duplicates(nodeBNGSP)
nodeBNGSP.final<- nodeBNGSP[LondonWard,]
# tmap view
tmap_mode("view")
tm_shape(LondonWard) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(nodeBNGSP.final) +
  tm_dots(col = "blue")

# count number of point in each ward 
library(GISTools)
poly.counts(nodeBNGSP.final, LondonWard) -> nodecount
##setNames(nodecount, LondonWard@data$nodecount)
London.node <- LondonWardSF
London.node$nodecount <- nodecount   #add number of point to each ward (London.node$nodecount)
#combine the ward area(sq km)
London.node$Square_Kilometres <- LondonWard.join.popdens$`Square Kilometres`
#calculate point/area--point per square kilometre
London.node$PointPerSK <-London.node$nodecount/London.node$Square_Kilometres
qtm(London.node,fill="PointPerSK")
#join with LondonWardSF
London.nodeSF <- append_data(LondonWardSF,London.node, key.shp = "code", key.data = "New.code", ignore.duplicates = TRUE)
```

economic vitality
```{r}
###economic vitality
#CLASS 1  house price
#HousePrice <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/house_price_mean2016.csv",na = "n/a")
#HousePrice.normalization <- normalization(HousePrice$Value)
#HousePrice$HousePrice.normalization <- HousePrice.normalization
#CLASS 2 economic activity
EcoAct <- read_csv("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/economic_activity_ward.csv",na = "n/a")
EcoAct.normalization <- normalization(EcoAct$`Economically active: Total`)
EcoAct$EcoAct.normalization <- EcoAct.normalization

```

land use mix
```{r}
### Land use mix
#https://en.wikipedia.org/wiki/Planning_use_classes_in_England 
# six classes--
landcover <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/gis_osm_landuse_a_free_1.shp",layer="gis_osm_landuse_a_free_1")
landcoverBNG <- spTransform(landcover, BNG)
landcoverBNGSF <- st_as_sf(landcoverBNG)

AOIS <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/gis_osm_pois_a_free_1.shp",layer="gis_osm_pois_a_free_1")
AOISBNG <- spTransform(AOIS, BNG)
AOISBNGSF <- st_as_sf(AOISBNG)

#find missing data in each column in landcoverBNGSF
colSums(is.na(landcoverBNGSF))
#get the classes of landcover
summary(landcoverBNGSF)
#CLASS1 greenspace
greenspaceBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="park" |  landcoverBNGSF$fclass =="forest"|landcoverBNGSF$fclass =="nature_reserve"|landcoverBNGSF$fclass =="grass"),] 
greenspaceBNGSP <- as(greenspaceBNGSF,"Spatial")
#CLASS2 industrial and commercial
industrialBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="industrial"| landcoverBNGSF$fclass =="commercial"),]
industrialBNGSP <- as(industrialBNGSF,"Spatial")
#CLASS 3 residential
residentialBNGSF <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="residential" ),]
residentialBNGSP <- as(residentialBNGSF,"Spatial")
#CLASS 4 recreation-leisure
recreationBNGSF1 <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="recreation_ground"| landcoverBNGSF$fclass =="allotments"),]
recreationBNGSF2 <- AOISBNGSF[ which(AOISBNGSF$fclass =="attraction"|AOISBNGSF$fclass =="theatre"|AOISBNGSF$fclass =="arts_centre"),]
recreationBNGSF <- rbind(recreationBNGSF2,recreationBNGSF1)
recreationBNGSP <- as(recreationBNGSF,"Spatial")


#CLASS 5 retail
retailBNGSF1 <- landcoverBNGSF[ which( landcoverBNGSF$fclass =="retail" ),]
retailBNGSF2 <- AOISBNGSF[ which( AOISBNGSF$fclass =="retail" ),]
retialBNGSF <- rbind(retailBNGSF2,retailBNGSF1)
retailBNGSP <- as(retailBNGSF,"Spatial")

#CLASS 6--community services
communityBNGSF <- AOISBNGSF[ which(AOISBNGSF$fclass =="school"|AOISBNGSF$fclass =="hospital"|AOISBNGSF$fclass =="nursing_home"|AOISBNGSF$fclass =="university"  ),]
communityBNGSP <- as(communityBNGSF,"Spatial")

#CLASS 7--transport
transport <- readOGR("E:/UCL/005-GI System&Science/GIS-Assessment 3/R assessment3/greater-london-latest-free.shp/road_polygon_merge.shp",layer="road_polygon_merge")
transportBNG <- spTransform(transport, BNG)
transportBNG <- transportBNG[transportBNG@data$AREA >10000,] 


```

```{r}
# Assign the land use area for each subset. Here we first use the most widespread residential as example 
library(raster)
r <- raster(ncols=400, nrows=400) #generate raster size(quantity)
extent(r) <- extent(residentialBNGSP)  #Important! Assign the extent of raster to cover the same extents of the polygon
residential.Raster <- rasterize(residentialBNGSP,r,background=NA) #convert polygon to raster
#tmap_mode("view")
#qtm(residential.Raster)+tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.5)
#extract rasters from polygons#(extract() cannot use method=bilinear in raster-in-polygon)
residential.extract <- raster::extract(residential.Raster,LondonWard,df=TRUE, weights =FALSE, na.rm = TRUE)
#remove rows with NA
row.has.na <- apply(residential.extract, 1, function(x){any(is.na(x))})
residential.extract2 <-residential.extract[!row.has.na,]
#merge the pixel in same ward(aggregate the frequency of the same data in one)
library(plyr)
residential.extract.sum <- factor(residential.extract2$ID)  
residential.extract.sum <- table(residential.extract2$ID)
residential.extract.wardsum <- as.data.frame(residential.extract.sum)
#create new order number for LondonWardSF, for merge the residential.extract and LondonWardSF by order
LondonWard2SF <- LondonWardSF
LondonWard2SF$Var1 <- 1:630
LondonWard2SF[,'Var1']<-factor(LondonWard2SF[,'Var1'])
#merge two dataframe
residential.extract.final <- merge(residential.extract.wardsum, LondonWard2SF, by="Var1",all=TRUE)
#residential.extract.final [!duplicated(residential.extract.final ), ]
#replace NA values with 0
residential.extract.final[is.na(residential.extract.final)] <- 0
#rename the column
colnames(residential.extract.final)[2] <- "Freq.residential"
#raster cell size
#we have to convert raster to polygon because of raster(). This function only compute the pixel area in longitude/latitude coordiante system
residential.repolygon <- rasterToPolygons(residential.Raster, fun=NULL, n=4, na.rm=TRUE, digits=12, dissolve=TRUE)
#qtm(residential.repolygon)
residential.repolygonSF <- st_as_sf(residential.repolygon) 
residential.repolygonSF$area <- st_area(residential.repolygonSF)
#so the single pixel area is 28999.54 [m^2](0.029 km^2)

```

land use mix functions
```{r}
##based on the details above, we create functions for land use analysis
function.raster <- function(landsp){
  extent(r) <- extent(landsp)
  landsp.Raster <- rasterize(landsp,r,background=NA)
  return(landsp.Raster)
}

function.extract<- function(landsp.Raster){
  landsp.extract <- raster::extract(landsp.Raster,LondonWard,df=TRUE, weights =FALSE, na.rm = TRUE)
  row.has.na <- apply(landsp.extract, 1, function(x){any(is.na(x))})
  landsp.extract2 <-landsp.extract[!row.has.na,]
  landsp.extract.sum <- factor(landsp.extract2$ID)  
  landsp.extract.sum <- table(landsp.extract2$ID)
  landsp.extract.wardsum <- as.data.frame(landsp.extract.sum)
  landsp.extract.final <- merge(landsp.extract.wardsum, LondonWard2SF, by="Var1",all=TRUE)
  landsp.extract.final[is.na(landsp.extract.final)] <- 0
  return(landsp.extract.final)
}
```

```{r}
#green space raster
greenspace.Raster <- function.raster(greenspaceBNGSP)
greenspace.extract.final <- funtion.extract(greenspace.Raster)
colnames(greenspace.extract.final)[2] <- "Freq.greenspace"

#industrial and commercial
industrial.Raster <- function.raster(industrialBNGSP)
industrial.extract.final <- function.extract(industrial.Raster)
colnames(industrial.extract.final)[2] <- "Freq.industrial"
#recreation
recreation.Raster <- function.raster(recreationBNGSP)
recreation.extract.final <- function.extract(recreation.Raster)
colnames(recreation.extract.final)[2] <- "Freq.recreation"
#retail
retail.Raster <- function.raster(recreationBNGSP)
retail.extract.final <- function.extract(retail.Raster)
colnames(retail.extract.final)[2] <- "Freq.retail"
#community
community.Raster <- function.raster(communityBNGSP)
community.extract.final <- function.extract(community.Raster)
colnames(community.extract.final)[2] <- "Freq.community"
#transport
transport.Raster <- function.raster(transportBNG)
transport.extract.final <- function.extract(transport.Raster)
colnames(transport.extract.final)[2] <- "Freq.transport"

tmap_mode("view")
tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.2)+qtm(recreation.Raster)
#tm_shape(LondonWard) +tm_polygons(col = NA, alpha = 0.2)+qtm(residential.Raster)

#extract useful columns from multiple dataframe
landuse.dataframe <- residential.extract.final
landuse.dataframe <- merge(landuse.dataframe,greenspace.extract.final, by="Var1",all=TRUE)
landuse.dataframe <- merge(landuse.dataframe,industrial.extract.final, by="Var1",all=TRUE)
landuse.dataframe <- merge(landuse.dataframe,recreation.extract.final, by="Var1",all=TRUE)
landuse.dataframe <- merge(landuse.dataframe,retail.extract.final, by="Var1",all=TRUE)
landuse.dataframe <- merge(landuse.dataframe,community.extract.final, by="Var1",all=TRUE)
landuse.dataframe <- merge(landuse.dataframe,transport.extract.final, by="Var1",all=TRUE)
landuse.dataframe[,c(9:13,15:19,21:25,27:31,33:37,39:43)] <- NULL #remove useless column
colnames(landuse.dataframe)[2] <- "residential.freq" #rename column
head(landuse.dataframe)


```

```{r}

#calculate each ward area
landuse.dataframeSF <-left_join(LondonWardSF,landuse.dataframe,by=c("gss_code_ward"="gss_code_ward.x"))
landuse.dataframeSF$wardArea <- st_area(landuse.dataframeSF)

library(SciViews)
function.landusemix <- function(landusefreq){
  landusearea <- landusefreq*(0.029*0.029)
  landusePercent <- landusearea/landuse.dataframeSF$wardArea
  LUM <- landusePercent*ln(landusePercent)
  return(LUM)
}

landuse.dataframeSF$residential.LUM <- function.landusemix(landuse.dataframeSF$residential.freq)
landuse.dataframeSF$greenspace.LUM <- function.landusemix(landuse.dataframeSF$Freq.greenspace)
landuse.dataframeSF$industrial.LUM <- function.landusemix(landuse.dataframeSF$Freq.industrial)
landuse.dataframeSF$recreation.LUM <- function.landusemix(landuse.dataframeSF$Freq.recreation)
landuse.dataframeSF$retail.LUM <- function.landusemix(landuse.dataframeSF$Freq.retail)
landuse.dataframeSF$community.LUM <- function.landusemix(landuse.dataframeSF$Freq.community)
landuse.dataframeSF$transport.LUM <- function.landusemix(landuse.dataframeSF$Freq.transport)
library(base)
###sum landuse.LUM and calculate index
index.dataframe<- st_set_geometry(landuse.dataframeSF[,c(1:6,18:25)],NULL)
#replace NAN with 0
index.dataframe[is.na(index.dataframe)] <- 0 
###compute land use mix index
index.dataframe$MixIndex <- (abs(rowSums(index.dataframe[,8:14])))/ln(7)

qtm(index.dataframeSF,fill="MixIndex")
```

```{r}

qtm(residential.Raster)
library(tmap)
tmap_mode("view")
tm_shape(LondonWard) +
  tm_polygons(col = NA, alpha = 0.5) +
tm_shape(residentialBNGSP) +
  tm_polygons(col = "blue")
```

```{r}
###combine all subindex 
index.dataframe <- merge(index.dataframe,Density,by.x="gss_code_ward",by.y="New Code",all=TRUE)
#index.dataframe[,3:7] <- NULL
index.dataframe <- merge(index.dataframe,Access.normalization, by.x="gss_code_ward",by.y="WD13CD",all=TRUE)
index.dataframe <- merge(index.dataframe,EcoAct, by.x="gss_code_ward",by.y="New Code",all=TRUE)
index.dataframe <- index.dataframe[,c(1:4,15,20,25,29)]
###calculate the final index of wards,set weights
function.vitality.index <- function(wLand,wDensity,wAccess,wEcoAct){
  Index <- (wLand)*(index.dataframe[,5])+(wDensity)*(index.dataframe[,6])+(wAccess)*(index.dataframe[,7])+(wEcoAct)*(index.dataframe[,8])
  return(Index)
}
#assign weight to each subindex
index.dataframe$Index <- function.vitality.index(0.4,0.3,0.2,0.1)
#join with LondonWardSF and visualisation
index.dataframeSF <-left_join(LondonWardSF,index.dataframe,by=c("gss_code_ward"="gss_code_ward"))
index.dataframeSP <- as(index.dataframeSF, "Spatial")
#create index facet(using tmap)
tmap_mode("view")
tm_shape(index.dataframeSP )+
    tm_polygons(c("Index", "MixIndex","Density.normalization","EcoAct.normalization","access.mean")) +
    tm_facets(sync = TRUE, ncol = 1,nrow=5,drop.empty.facets = F)
```

#Test for the relationship
##Linear regression model

```{r}
###Since we have done the most important part of data manipulation, next we will explore the relationship between intersection and vitality
#histogram of two variables
qplot(PointPerSK, data = London.node, geom = "histogram")
#ggplot(London.node, aes(x="PointPerSK", y=PointPerSK))+geom_boxplot()#(this is another code for boxplot in ggplot2)
#qplot(y=London.node$PointPerSK, x= 0.1, geom = "boxplot")  

qplot(Index, data = index.dataframe, geom = "histogram")
#qplot(y= index.dataframe$Index, x= 0.1, geom = "boxplot")  

#the variables look normally distributed,but we should consider the outliers
plot(London.node$PointPerSK,index.dataframeSF$Index,col="blue")
#after scatter plot, we should remove "Lady Margaret" as its unusual road intersection density(maybe due to the residential road intersections are included)

node.index.frame <-merge(index.dataframe,London.node, by.x="gss_code_ward",by.y="gss_code_ward",all=TRUE) 
#remove outlier--Lady Margaret(E05000181)
#node.index.frame <- node.index.frame[node.index.frame$PointPerSK<300,]
#linear regression plot
qplot(PointPerSK,Index,data=node.index.frame,geom = "point") + stat_smooth(method="lm", se=FALSE, size=1)
#fit the linear model
model <- lm(PointPerSK ~ Index, data =node.index.frame)
#residual dataframe
library(broom)
model_res <- tidy(model)
summary(model)
plot(model)


```

The coefficient (estimate) in the summary() table shows that(on average) for a 1 unit (day) increase in unauthorised absence from school, there is a reduction in the average GCSE point score of -29.874.

The p-values for the intercept and the coefficient are highly statistically significant (<0.001) so we can rely on the relationship that is being observed.

The adjusted R-squared statistic is 0.38, which tells us that 38% of the variation in GCSE scores across Wards in London can be explained by variation in unauthorised absence from school (which is quite a lot for a single variable).

Interrogating the last graph in plot(model1)which is a scatter plot of fitted values (the model estimates achieved by plugging the values and coefficients back into the regression equation) against standardised residuals, we can see no apparent patterns in the cloud of points, which suggests the model has not violated any important assumptions.

##Test for spatial patterns
we shoud test that is there any spatial clustering of residuals.If these places cluster in space, then there might be some unobserved underlying factor causing this. This is important is it means that the assumption of independence that regression models rely upon might be violated.
Now in our case here, there is not a clear violation of spatial independence, but it is certainly hinted at.
 test for spatial patterns (spatial autocorrelation) using the Moran's I statistic
```{r}
### Test for spatial patterns (spatial autocorrelation) using the Moran's I statistic
#copy a new LondonWardSF
LondonWardSF.pattern <- LondonWardSF
LondonWardSF.pattern$model_resids <- model$residuals
LondonWardSF.pattern <- LondonWardSF.pattern[!(is.na(LondonWardSF.pattern$model_resids) ),]
LondonWardSP.pattern <- as(LondonWardSF.pattern,"Spatial")
tmap_mode("view")
qtm(LondonWardSF.pattern,fill='model_resids')


```

```{r}
#library(spdep)
#Calculate the centroids of London wards
centWard <- coordinates(LondonWardSP.pattern)
plot(centWard)
#Generate the spatial weights matrix,using binary matrix of queen's case neighbours
#create neighbours list

library(spdep)
neighbour <- poly2nb(LondonWardSP.pattern,queen=T)
#plot
plot(neighbour,coordinates(centWard),col="red")
plot(LondonWardSP.pattern,add=T)
#create spatial weights object from these weights
SpatialWeight <- nb2listw(neighbour,style="C",zero.policy=T)
#Run moran I test
moran.test(LondonWardSP.pattern@data$model_resids, SpatialWeight,zero.policy=T)
```
so here we have a statistically significant but relatively weak indication that there is some spatial clustering of residual values. A value of 0.07 (1 being perfect spatial autocorrelation, 0 being none at all) shows that there is no evidence that high residual values cluster near high values and low residual values cluster near lower values.


#multi linear regression model
```{r}
###For a deeper research, we will build a multiple linear regression model to find whether the urban vitality index has a relationship with urban vitality sub-factors. If so, which one is the strongest or weakest.
#
library(corrplot)
cormat <- cor(node.index.frame[,c(5:8,16)], use="complete.obs", method="pearson")
corrplot(cormat)
#rRun multiple regression model
model_multi <- lm(PointPerSK ~ MixIndex+Density.normalization + access.mean + EcoAct.normalization, data = node.index.frame)
summary(model_multi)
```

```{r}
##same as residual autocorrelation analysis above
model_res_multi <- tidy(model_multi)
plot(model_multi)
LondonWardSF.pattern$model_resids_multi <- model_multi$residuals
LondonWardSP.pattern <- as(LondonWardSF.pattern,"Spatial")
tmap_mode("view")
qtm(LondonWardSF.pattern,fill='model_resids_multi')
library(spdep)
neighbour <- poly2nb(LondonWardSP.pattern,queen=T)
#create spatial weights object from these weights
SpatialWeight <- nb2listw(neighbour,style="C",zero.policy=T)
#Run moran I test
moran.test(LondonWardSP.pattern@data$model_resids_multi, SpatialWeight,zero.policy=T)
```

